Set Up

Install/load necessary R packages

Read in water year data from CSV files created in 1_water_years.Rmd and stored in the intermediate_data directory

wy2017_clean <- read_csv(here("intermediate_data", "wy2017_clean.csv"))
wy2018_altered <- read_csv(here("intermediate_data", "wy2018_altered.csv"))
wy2019_clean <- read_csv(here("intermediate_data", "wy2019_clean.csv")) 
all_streamflow <- read_csv(here("intermediate_data", "all_streamflow.csv"))

Set date range

min_year = 2017
max_year = 2021

Create sub-directories if necessary

output_dir <- file.path(here("intermediate_data"))

if (!dir.exists(output_dir)){
dir.create(output_dir)
} else {
    print("intermediate_data directory already exists!")
}
## [1] "intermediate_data directory already exists!"
output_dir <- file.path(here("figures"))

if (!dir.exists(output_dir)){
dir.create(output_dir)
} else {
    print("figures directory already exists!")
}
## [1] "figures directory already exists!"

Manual Streamflow Checks (2017 - 2021)

  • Read in the manual streamflow checkpoints from the External-MEF_DATA Box folder
    • The raw data is found at the following file path: External-MEF_DATA/Hydro/Streamflow/L0_subdaily/ManualChecks
  • Clean and manipulate the data into a tidy format
    • Renaming columns to use lower_snake_case and avoid special characters via clean_names()
    • Convert the collected column from a character into a POSIXct date format
    • Remove rows with NA values in the stripchart_stage column
  • Subset the data to include only the data of interest (S2 weir data) and the time period of interest (2017 - 2019)
  • Plot the manual checks data
#create file path to call the data from Box 
## Mia's file path 
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/ManualChecks"

#read in the manual checks data 
mc <- read_csv(here(filepath, "2017-2021_S2Stage.csv"))

#clean the data
mc_clean <- mc %>% 
  clean_names() %>% 
  #remove S2 lagging pool data and keep only the S2 weir data
  subset(name == "S2 WEIR") %>%
  mutate(collected = as.POSIXct(collected, format = '%m/%d/%Y %H:%M', 
                                tz = "GMT"),
         year = format(as.POSIXct(collected, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y'),
         date = format(as.POSIXct(collected, format = '%Y-%m-%d %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d')
         ) %>% 
  subset(year >= min_year & year <= max_year) %>% 
  subset(!is.na(stripchart_stage))

#save clean data CSV to use in future RMD files
write.csv(x = mc_clean, 
          file = file.path(here("intermediate_data", "mc_clean.csv")),
          row.names = FALSE)

#plot 
ggplot(data = mc_clean) + 
  geom_point(aes(x = collected, 
                 y = stripchart_stage))+ 
  theme_classic() + 
  labs(y = "Stream Height (ft)", 
       x = "Time", 
       title = paste0("S2 Weir Manual Streamflow Checks (", min_year, "-", max_year, ")")
       ) + 
  theme(plot.title = element_text(hjust = 0.5))

Streamflow + Manual Checks (2017 - 2021)

  • Combine all 3 water years using rbind()

  • Plot stripchart data (as lines) then add the manual checks (as points) on top

#plot
p_all <- ggplot(data = all_streamflow) + 
  geom_line(aes(x = datetime, 
                y = stream_height_ft,
                group = 1,
                #create hovering labels for interactive graph 
                text = paste0("DateTime: ", datetime, "\n",
                              "Stream Height (ft): ", round(stream_height_ft, digits = 3))),
            size = 0.25) + 
  geom_point(data = mc_clean, 
             aes(x = collected, 
                 y = stripchart_stage,
                 group = 1,
                 #create hovering labels for interactive graph 
                 text = paste0("DateTime: ", collected, "\n",
                              "Stream Height Checkpoint (ft): ", round(stripchart_stage, digits = 3))), 
             color = "red",
             size = 0.5) + 
  theme_classic() + 
  labs(x = "Time", 
       y = "Stream Height (ft)",
       title = paste0("S2 Bog Stream Height (", min_year, "-", max_year, ")"
       ),
       subtitle = "Stripchart data are plotted as black lines. Manual checks are plotted as red dots.") + 
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5))

#static plot
#p_all

#save PNG file
ggsave(filename = "streamflow_with_manual_checks.png",
       plot = p_all,
       path = "figures/",
       width = 6,
       height = 3,
       units = c("in"),
       dpi = 300)

#interactive plot 
ggplotly(p_all, tooltip = "text")

1:1 Stripchart vs Manual Checkpoints Comparison (2017 - 2021)

Next, we are interested in a 1:1 comparison of stripchart data vs manual checkpoints data at the exact same timestamp. Since the stripchart data and manual checkpoints do not align perfectly, we will interpolate the stripchart stream flow values using the na.approx() function from the zoo package to estimate stripchart values at the time of the manual checks.

  • Clean the data

  • Note that the manual checks data only range from 2017-04-04 to 2019-12-31, which is a smaller time range than the stripchart data

#clean the manual checks data 
mc_sub <- mc_clean %>% 
  #extract the data (without the timestamp)
  mutate(date = format(as.POSIXct(collected, format = '%m/%d/%Y %H:%M:%S', 
                                  tz = "GMT"), 
                       format = '%Y-%m-%d'),
         date = as.POSIXct(date, tz = "GMT"),
         year = as.numeric(year)
         ) %>% 
  #rename column
  rename(datetime = collected) %>% 
  #remove unnecessary columns 
  select(-site, -lab_id, -name, -point_gage, -logger_stage) 

#identify date ranges of interest: 2017-04-04 to 2019-12-31
##aka the range of the manual checkpoint data
max_date <- max(mc_sub$date)
min_date <- min(mc_sub$date)
nrows <- nrow(mc_sub)

#subset stripchart data to fit within the time range of the manual checkpoints
streamflow_sub <- all_streamflow %>% 
  mutate(
    year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S', 
                             tz = "GMT"),
                  format = '%Y'), 
    year = as.numeric(year)
    ) %>% 
  subset(date <= max_date & date >= min_date) 
  • Join the manual checks dataset with the stripchart dataset

  • Plot the data to visually examine if the approximated values generated by na.approx() look correct

#join the stripchart data and manual checks data 
fj <- full_join(x = mc_sub, 
                y = streamflow_sub, 
                by = c("datetime", "date", "year")) %>% 
  #rename columns 
  rename(manual_check = stripchart_stage,
         stripchart = stream_height_ft) %>% 
  #rearrange dataframe into a long format
  pivot_longer(cols = c("manual_check", "stripchart"),
               names_to = "types",
               values_to = "stream_height_ft") %>%
  #interpolate to fill in missing NA values
  mutate(approx = na.approx(object = stream_height_ft,
                             x = datetime,
                            method = "linear",
                            maxgap = 6),
         #calculate the difference between approximated values and real values
         diff = approx - stream_height_ft) 

##visually examine the approximated values 
# ggplot(data = fj) +
#   geom_line(aes(x = datetime, y = approx))
# 
# ggplot(data = fj) +
#   geom_point(aes(x = datetime,
#                 y = stream_height_ft,
#                 color = types))
# 
# ggplot(data = fj) +
#   geom_point(aes(x = datetime,
#                 y = stream_height_ft,
#                 color = types)) +
#   geom_point(aes(x = datetime,
#                 y = approx),
#              alpha = 0.05)


fj <- fj %>%
  #remove extraneous columns 
  select(-stream_height_ft, -diff) %>% 
  #return to wide format to create scatterplot
  pivot_wider(
    names_from = "types",
    values_from = "approx"
  ) %>% 
  #unlist the columns created by pivot_wider() 
  unnest


# #visually examine all data 
# ggplot(data = fj) +
#   geom_point(aes(x = manual_check,
#                  y = stripchart))
  • Subset the joined dataframe to include only the timestamps of interest (AKA the timestamps from the original manual checks dataset)

  • Plot

#left join the joined data and manual checks data to keep only the timestamps of interest 
lj <- left_join(x = mc_sub, 
                y = fj,
                by = c("datetime", "year", "date")) %>% 
  #ensure the correct column types
  mutate(manual_check = as.numeric(manual_check),
         stripchart = as.numeric(stripchart),
         diff = manual_check - stripchart)

#test if the subsetted data has the same number of observations as the manual checks dataframe
if(nrow(lj) != (nrow(mc_sub))) stop("Check lj dataframe dimensions")

#test if the approximated data differs greatly from the stripchart/manual checks data 
if(abs(lj$diff) > 0.01 ) stop("Check differences")

#save clean data CSV to use in future RMD files
write.csv(x = lj, 
          file = file.path(here("intermediate_data", "streamflow_mc_lj.csv")),
          row.names = FALSE)
#plot 
p_1to1 <- ggplot() + 
  geom_point(data = lj, 
             aes(x = manual_check, 
                 y = stripchart, 
                 group = 1,
                 #create hovering labels for interactive graph 
                 text = paste0("Manual Checkpoint (ft): ", manual_check, "\n",
                              "Stripchart (ft): ", stripchart)),
             alpha = 0.5) + 
  theme_classic() +
  labs(x = "Manual Checkpoints",
       y = "Stripchart Data",
       title = paste0("S2 Manual Checks vs Interpolated Stripchart Values (", min_year, "-", max_year, ")"),
       subtitle = "The line y = x is plotted for reference.") +
    theme(plot.title = element_text(hjust = 0.5),
          plot.subtitle = element_text(hjust = 0.5)) + 
  geom_abline(slope = 1, intercept = 0) +
  xlim(0, 0.4) +
  ylim(0, 0.4)

#static plot
#p_1to1

#save the figure 
ggsave(filename = "streamflow_mc_comparison.png",
       plot = p_1to1,
       path = "figures/",
       width = 6,
       height = 5,
       units = c("in"),
       dpi = 300)

#interactive plot 
ggplotly(p_1to1, tooltip = "text")

Note that this file is being knit as index.html into the manual_checks repository in order to update the GitHub pages website, where the plots can be viewed online.